# Separate out offshore
tot.prod.by.fed.off = dt.panel[, .(liq=sum(liq)/(365.25/12)/1e6, gas=sum(gas)/(365.25/12)/1e6), 
                               by=.(prod_date, fed.land, offshore)]
tot.prod.by.fed.off = tot.prod.by.fed.off[order(fed.land, offshore, prod_date)]
tot.prod.by.fed.off[, land.type := ifelse(fed.land==1, 'Federal','NonFederal')]
tot.prod.by.fed.off[, offshore := ifelse(offshore==1, 'Offshore','Onshore')]
tot.prod.by.fed.off = dcast(tot.prod.by.fed.off, prod_date ~ land.type + offshore, value.var=c('liq','gas'))
tot.prod.by.fed.off[, liq_tot := liq_Federal_Onshore + liq_Federal_Offshore + liq_NonFederal_Onshore + liq_NonFederal_Offshore]
tot.prod.by.fed.off[, gas_tot := gas_Federal_Onshore + gas_Federal_Offshore + gas_NonFederal_Onshore + gas_NonFederal_Offshore]

# Disaggregate by fed/offshore/prod_type
tot.prod.by.fed.off.type = dt.panel[, .(liq=sum(liq)/(365.25/12)/1e6, gas=sum(gas)/(365.25/12)/1e6), 
                                    by=.(prod_date, fed.land, offshore, prod_type)]
tot.prod.by.fed.off.type = tot.prod.by.fed.off.type[order(fed.land, offshore, prod_date)]
tot.prod.by.fed.off.type[, land.type := ifelse(fed.land==1, 'Federal','NonFederal')]
tot.prod.by.fed.off.type[, offshore := ifelse(offshore==1, 'Offshore','Onshore')]

# Remove dt.panel to save on memory, since we don't need it for the time being.
rm(dt.panel)
gc()

# Comparing to ONRR data, these match almost exactly (e.g., 0.42 mb/d vs 0.44 in ONR data)
# on everything except onshore federal gas, where we're undercounting somewhat
# (like 5.9 bcf/d vs 8.6 in ONR data).
# This is largely due to old gas wells out west which were drilled predating our 
# sample (1990+).
tot.prod.by.fed.off[prod_date=='2017-01-01',
                    .(prod_date, liq_Federal_Onshore,
                      liq_Federal_Offshore, gas_Federal_Onshore,
                      gas_Federal_Offshore)]

### Add in old gas data
# this reads in data on old gas wells in the big 4 federal gas producing
# states (WY, CO, NM, UT) and calculates tot.prod.by.fed.old.gas.states,
# which is a welltype-year panel on oil and gas production from those wells.
# This is ONLY for  generating the appendix figure "Production_Drillinginfo_vs_ONRR"
# and plays no other role in the analysis.
source(script.dir%&%'examine_old_gas_wells.R') 
sapply(tot.prod.by.fed.old.gas.states, class)
tot.prod.by.fed.old.gas.states = tot.prod.by.fed.old.gas.states[prod_date %in% as.Date(tot.prod.by.fed.off[, unique(prod_date)])]
setcolorder(tot.prod.by.fed.off, names(tot.prod.by.fed.old.gas.states))

# Add production values for the 4 states with old gas wells (pre 1990)
tot.fed.prod.final = 
  tot.prod.by.fed.off[, -c('prod_date'), with=F] + 
  tot.prod.by.fed.old.gas.states[, -c('prod_date'), with=F]
tot.fed.prod.final = cbind(tot.prod.by.fed.off[, .(prod_date=prod_date)], tot.fed.prod.final)

# a much smaller share of US oil and gas production comes from federal land 
# (22 percent of oil and 12 percent of gas in 2018) compared to coal ...
prod.2018 = tot.fed.prod.final[year(prod_date)==2018, lapply(.SD, mean)]
prod.2018[, (liq_Federal_Onshore + liq_Federal_Offshore)/liq_tot]
prod.2018[, (gas_Federal_Onshore + gas_Federal_Offshore)/gas_tot]

onrr.prod = fread(data.dir%&%'/ONRR/monthly_production.csv')
onrr.prod = onrr.prod[-grep('Coal', Commodity)]
onrr.prod[, Volume := gsub(',','',Volume)]
onrr.prod[, Volume := as.numeric(Volume)]
onrr.prod[, date := Month %&% '/' %&% `Calendar Year`]
onrr.prod[, date := parse_date_time(date, orders='by')]
onrr.prod[, date := as.Date(date)]
onrr.prod[grep('Gas', Commodity), prod_type :=  'Gas']
onrr.prod[grep('Oil', Commodity), prod_type :=  'Oil']
onrr.prod[grep('Gas', Commodity), production := Volume /1e6/(365.25/12)] # mcf -> bcf/d
onrr.prod[grep('Oil', Commodity), production := Volume /1e6/(365.25/12)] # bbl -> mln bpd
onrr.prod = dcast(onrr.prod, date ~ prod_type + `Land Class` + `Land Category`, value.var='production')

gas.compare = cbind(tot.fed.prod.final[year(prod_date)==2018, 
                                       .(prod_date, DI_gas = gas_Federal_Onshore)],
                    onrr.prod[year(date)==2018, .(date, ONRR_gas = Gas_Federal_Onshore)])
tot.fed.prod.final[, prod_date:=as.Date(prod_date)]

palette(c(rgb(4, 39, 60, maxColorValue=255), # RFF black
          rgb(116,100,94, maxColorValue=255), # RFF brown
          rgb(80,177,97, maxColorValue=255), # RFF green
          rgb(136,196,244, maxColorValue=255), # RFF blue
          rgb(255, 102, 99, maxColorValue=255), # RFF coral
          rgb(118, 94, 165, maxColorValue=255), # RFF purple
          rgb(244, 162, 95, maxColorValue=255), # RFF orange
          rgb(235, 211, 103, maxColorValue=255), # RFF yellow
          rgb(224, 228, 231, maxColorValue=255))) # RFF light gray

# Plot Drillinginfo federal production vs ONRR reported production
pdf(output.dir%&%'/Production_Drillinginfo_vs_ONRR_'%&%today()%&%'.pdf', family='serif', width=14, height=8)
par(mfrow=c(1,2)) # 
par(cex=1.25)
plot(Oil_Federal_Onshore ~ date, data=onrr.prod, type='l', col=5, lwd=2, ylim=c(0,2.5),
     main='Federal Oil Production', ylab='Million barrels per day', xlab='Month')
grid()
lines(Oil_Federal_Offshore ~ date, data=onrr.prod, type='l', col=5, lwd=2, lty=2)
lines(liq_Federal_Onshore ~ prod_date, data=tot.fed.prod.final[prod_date<='2019-11-01'], col=4, lwd=2, lty=1)
lines(liq_Federal_Offshore ~ prod_date, data=tot.fed.prod.final[prod_date<='2019-11-01'], col=4, lwd=2, lty=2)

legend('topleft', legend=c('ONRR Data', 'Drillinginfo Data'), lty=1, col=c(5,4), lwd=2, bg='white')
legend('topright', legend=c('Onshore','Offshore'), lty=1:2, col='black', lwd=2, bg='white')

plot(Gas_Federal_Onshore ~ date, data=onrr.prod, type='l', col=5, lwd=2, ylim=c(0,15),
     main='Federal Gas Production', ylab='Billion cubic feet per day', xlab='Month')
grid()
lines(Gas_Federal_Offshore ~ date, data=onrr.prod, type='l', col=5, lwd=2, lty=2)
lines(gas_Federal_Onshore ~ prod_date, data=tot.fed.prod.final[prod_date<='2019-11-01'], col=4, lwd=2, lty=1)
lines(gas_Federal_Offshore ~ prod_date, data=tot.fed.prod.final[prod_date<='2019-11-01'], col=4, lwd=2, lty=2)
dev.off()

# Plot historical production from each well type, focusing on 2000-2020
col_lookup = c('GasFederal'='#04273C', 'GasNonfederal'='#88C4F4',
               'OilFederal'='#FF6663', 'OilNonfedereal'='#EAD367')
# col_lookup[1:2] = colorspace::darken(col_lookup[1:2], 0.1)
# col_lookup[3:4] = colorspace::lighten(col_lookup[3:4], 0.1)
palette(col_lookup) # Order: gas fed, gas nonfed, oil fed, oil nonfed
my.cols = alpha(col_lookup[4:1], 1)
names(my.cols) = names(col_lookup[4:1])

tot.prod.by.fed.off.type[, prod_date:=as.Date(prod_date)]

hist.prod.plot = tot.prod.by.fed.off.type[year(prod_date)>=2000]
hist.prod.plot = hist.prod.plot[year(prod_date)<=2018] # start of simulations

for (lg in c('liq','gas')) {
  pdf(output.dir%&%lg%&%'_historical_production_'%&%today()%&%'.pdf', family='serif', width=11, height=8.5)
  form.temp = as.formula(paste0(lg,' ~ prod_date'))
  plot(form.temp, data=hist.prod.plot[fed.land==1 & offshore=='Onshore' & prod_type=='Gas'], type='l', lwd=3, col=1,
       xlab='', ylab=ifelse(lg=='liq','Oil Production (mb/d)','Gas Production (bcf/d)'),
       xlim=c(as.Date('2000-01-01'), as.Date('2021-01-01')),
       ylim=c(0,ifelse(lg=='liq',8,70)), xaxt='n', cex.lab=1.5, cex.axis=1.5)
  grid(nx=NA, ny=NULL)
  if (lg=='liq') yrng = seq(0,8,by=2)
  if (lg=='gas') yrng = seq(0,70,by=10)
  abline(h=yrng, lty=3, col='lightgray')
  abline(v=seq.Date(as.Date('2000-01-01'), as.Date('2018-01-01'), by='2 years'), lty=3, col='lightgray')
  axis(side=1, at=seq.Date(as.Date('2000-01-01'), as.Date('2019-01-01'), by='2 years'),
       labels = seq(2000, 2018, by=2), cex.axis=1.5)

  lines(form.temp, data=hist.prod.plot[fed.land==0 & offshore=='Onshore' & prod_type=='Gas'], lwd=3, col=2)
  lines(form.temp, data=hist.prod.plot[fed.land==1 & offshore=='Onshore' & prod_type=='Oil'], lwd=3, col=3)
  lines(form.temp, data=hist.prod.plot[fed.land==0 & offshore=='Onshore' & prod_type=='Oil'], lwd=3, col=4)
  lines(form.temp, data=hist.prod.plot[fed.land==1 & offshore=='Offshore' & prod_type=='Gas'], lwd=3, col=1, lty=2)
  lines(form.temp, data=hist.prod.plot[fed.land==0 & offshore=='Offshore' & prod_type=='Gas'], lwd=3, col=2, lty=2)
  lines(form.temp, data=hist.prod.plot[fed.land==1 & offshore=='Offshore' & prod_type=='Oil'], lwd=3, col=3, lty=2)
  lines(form.temp, data=hist.prod.plot[fed.land==0 & offshore=='Offshore' & prod_type=='Oil'], lwd=3, col=4, lty=2)
  if (lg=='liq') {
    ylev = hist.prod.plot[prod_date=='2018-12-01'][c(2,7,1,6), get(lg)] + c(-0.2, -0.2, 0, -0.2)
    ytext = c('Nonfederal\nOnshore Oil','Federal\nOffshore Oil','Nonfederal\nOnshore Gas','Federal\nOnshore Oil')
    ycol = col_lookup[c(4,3,2,3)]
  }
  if (lg=='gas') {
    ylev = hist.prod.plot[prod_date=='2018-12-01'][c(1,2,5), get(lg)] + c(-4, -1, 0)
    ytext = c('Nonfederal\nOnshore Gas','Nonfederal\nOnshore Oil','Federal\nOnshore Gas')
    ycol = col_lookup[c(2,4,1)]
  }
  text(x=as.Date('2019-01-01'), y=ylev, col=ycol, labels=ytext, pos=4, font=2)
  legend('topleft',legend=c('Oil Wells, Nonfederal', 'Oil Wells, Federal',
                            'Gas Wells, Nonfederal', 'Gas Wells, Federal'),
         fill=my.cols, cex=1.5, bg='white')
  legend('top',legend=c('Onshore', 'Offshore'), col=c('black'),
         lty=1:2, lwd=3,  cex=1.5, bg='white')
  dev.off()
}




